import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class SunSpider(CrawlSpider):
    name = 'sun'
    # allowed_domains = ['www.sun.com']
    start_urls = ['https://www.12306.cn/mormhweb/1/13/index_fl.html']

    # 链接提取器，根据指定规则（allow="正则"）进行指定链接的提取
    link = LinkExtractor(allow=r'/\d+/t\d+_\d+.html')
    print('link',link)

    rules = (
        # 规则解析器，将链接提取器提取到的链接进行指定规则（callback）进行解析
        Rule(link, callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        item = {}
        print(response)
        #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
        #item['name'] = response.xpath('//div[@id="name"]').get()
        #item['description'] = response.xpath('//div[@id="description"]').get()
        return item
